{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extracting features from time with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-05-17 00:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-05-17 01:15:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-05-17 02:30:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-05-17 03:45:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-05-17 05:00:40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 date\n",
       "0 2024-05-17 00:00:00\n",
       "1 2024-05-17 01:15:10\n",
       "2 2024-05-17 02:30:20\n",
       "3 2024-05-17 03:45:30\n",
       "4 2024-05-17 05:00:40"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's create a toy dataframe with some date variables\n",
    "\n",
    "rng_ = pd.date_range(\"2024-05-17\", periods=20, freq=\"1h15min10s\")\n",
    "df = pd.DataFrame({\"date\": rng_})\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>hour</th>\n",
       "      <th>min</th>\n",
       "      <th>sec</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-05-17 00:00:00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-05-17 01:15:10</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-05-17 02:30:20</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-05-17 03:45:30</td>\n",
       "      <td>3</td>\n",
       "      <td>45</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-05-17 05:00:40</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 date  hour  min  sec\n",
       "0 2024-05-17 00:00:00     0    0    0\n",
       "1 2024-05-17 01:15:10     1   15   10\n",
       "2 2024-05-17 02:30:20     2   30   20\n",
       "3 2024-05-17 03:45:30     3   45   30\n",
       "4 2024-05-17 05:00:40     5    0   40"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# extract hr, min and sec\n",
    "\n",
    "df[\"hour\"] = df[\"date\"].dt.hour\n",
    "df[\"min\"] = df[\"date\"].dt.minute\n",
    "df[\"sec\"] = df[\"date\"].dt.second\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>hour</th>\n",
       "      <th>min</th>\n",
       "      <th>sec</th>\n",
       "      <th>h</th>\n",
       "      <th>m</th>\n",
       "      <th>s</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-05-17 00:00:00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-05-17 01:15:10</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-05-17 02:30:20</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-05-17 03:45:30</td>\n",
       "      <td>3</td>\n",
       "      <td>45</td>\n",
       "      <td>30</td>\n",
       "      <td>3</td>\n",
       "      <td>45</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-05-17 05:00:40</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 date  hour  min  sec  h   m   s\n",
       "0 2024-05-17 00:00:00     0    0    0  0   0   0\n",
       "1 2024-05-17 01:15:10     1   15   10  1  15  10\n",
       "2 2024-05-17 02:30:20     2   30   20  2  30  20\n",
       "3 2024-05-17 03:45:30     3   45   30  3  45  30\n",
       "4 2024-05-17 05:00:40     5    0   40  5   0  40"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the same in one line\n",
    "\n",
    "df[[\"h\", \"m\", \"s\"]] = pd.DataFrame([(x.hour, x.minute, x.second) for x in df[\"date\"]])\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  1,  2,  3,  5,  6,  7,  8, 10, 11, 12, 13, 15, 16, 17, 18, 20,\n",
       "       21, 22, 23])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"hour\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>hour</th>\n",
       "      <th>min</th>\n",
       "      <th>sec</th>\n",
       "      <th>h</th>\n",
       "      <th>m</th>\n",
       "      <th>s</th>\n",
       "      <th>is_morning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-05-17 00:00:00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-05-17 01:15:10</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-05-17 02:30:20</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-05-17 03:45:30</td>\n",
       "      <td>3</td>\n",
       "      <td>45</td>\n",
       "      <td>30</td>\n",
       "      <td>3</td>\n",
       "      <td>45</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-05-17 05:00:40</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 date  hour  min  sec  h   m   s  is_morning\n",
       "0 2024-05-17 00:00:00     0    0    0  0   0   0           0\n",
       "1 2024-05-17 01:15:10     1   15   10  1  15  10           0\n",
       "2 2024-05-17 02:30:20     2   30   20  2  30  20           0\n",
       "3 2024-05-17 03:45:30     3   45   30  3  45  30           0\n",
       "4 2024-05-17 05:00:40     5    0   40  5   0  40           0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# is it morning?\n",
    "\n",
    "df[\"is_morning\"] = np.where((df[\"hour\"] < 12) & (df[\"hour\"] > 6), 1, 0)\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fsml",
   "language": "python",
   "name": "fsml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
